package com.atguigu.bigdata.test

import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

/**
 * @Author: yqb
 * @Date: 2022/6/8 23:51 
 * @Description: Demon 
 * @Version: 1.0
 * */
object Need07 {
    def main(args: Array[String]): Unit = {

        val need07: SparkConf = new SparkConf().setMaster("local[*]").setAppName("Need07")
        val context = new SparkContext(need07)

        val value: RDD[String] = context.textFile("datas/apache.log")
        val value1: RDD[(String, String)] = value.map(line => {

            val strings: Array[String] = line.split(" ")
            (strings(3), strings(6))

        })

        value1.filter {
            case (time, url) => {
                time.startsWith("17/05/2015")
            }

        }.collect().foreach(println)


        context.stop()

    }

}

/**
 * 从服务器日志数据 apache.log 中获取 2015 年 5 月 17 日的请求路径
 *
 * sacla  元组
 * Java   字符串
 * 正则表达式
 */